import numpy as np
from scipy import misc
import scipy.io as scio
import matplotlib.pyplot as plt
from sklearn import metrics
import h5py
from sklearn.cluster import KMeans, MiniBatchKMeans, Birch, DBSCAN
def averageDistToCenter(X,C):
avdist = np.sum(np.sqrt(np.sum((X - C)**2,axis=0)),axis=0)
avdist /= np.maximum(np.size(X,axis=1),1)
return avdist
def dunnIndex(X, C, labels):
centersDistance = np.sum((C.T[np.newaxis,:,:] - C.T[:,np.newaxis,:])**2., axis=-1).T
np.fill_diagonal(centersDistance, np.max(centersDistance))
centersMinDistance = np.sqrt(np.min(centersDistance))
uniqueLabels = C.shape[1]
m = 0.0
for l in range(uniqueLabels):
d = averageDistToCenter(X[:,labels==l],C[:,l, np.newaxis])
m = np.max([m,d])
return centersMinDistance/m
def daviesBouldinIndex(X, C, labels):
centersDistance = np.sqrt(np.sum((C.T[np.newaxis,:,:] - C.T[:,np.newaxis,:])**2., axis=-1).T)
uniqueLabels = C.shape[1]
distsToCenters = np.empty(uniqueLabels)
for l in range(uniqueLabels):
distsToCenters[l] = averageDistToCenter(X[:,labels==l],C[:,l, np.newaxis])
clusterSchemeMeasure = np.empty(uniqueLabels)
for i in range(uniqueLabels):
clusterSchemeMeasure[i] = np.max((distsToCenters[i] + np.delete(distsToCenters,i))/np.delete(centersDistance[:,i], i), axis=-1)
return np.sum(clusterSchemeMeasure)/uniqueLabels
with h5py.File('data.mat', 'r') as file:
image = np.array(list(file['ImageRaw']))
# dict = scio.loadmat('data')
image.shape
data = np.reshape(image.T,(1000*1500,285))
data2 = np.reshape(image,(285,1000*1500)).T
data2.shape
k_means = KMeans(init='k-means++', n_clusters=5, n_init=1)
k_means.fit(data)
k_means.fit(data2)
print('Dunn index: ', dunnIndex(data2.T, k_means.cluster_centers_.T, k_means.labels_))
print('Davies-Bouldin index: ', daviesBouldinIndex(data2.T, k_means.cluster_centers_.T, k_means.labels_))
areaMap = np.array([[i,j] for i in range(1000) for j in range(1500)])
areaMap2 = np.array([[j,i] for i in range(1000) for j in range(1500)])
areaMap
for i in range(5):
plt.scatter(areaMap[k_means.labels_ == i,0], areaMap[k_means.labels_ == i,1], marker='.')
plt.show()
for i in range(5):
plt.scatter(areaMap2[k_means.labels_ == i,0], areaMap2[k_means.labels_ == i,1], marker='.')
plt.show()
k_means10 = KMeans(init='k-means++', n_clusters=10, n_init=1)
k_means10.fit(data2)
print('Dunn index: ', dunnIndex(data2.T, k_means10.cluster_centers_.T, k_means10.labels_))
print('Davies-Bouldin index: ', daviesBouldinIndex(data2.T, k_means10.cluster_centers_.T, k_means10.labels_))
np.unique(k_means10.labels_)
for i in range(10):
plt.plot(areaMap[k_means10.labels_ == i,0], areaMap[k_means10.labels_ == i,1], '.', markersize=1)
plt.show()
for i in range(10):
plt.plot(areaMap2[k_means10.labels_ == i,0], areaMap2[k_means10.labels_ == i,1], '.', markersize=1)
plt.show()
k_means20 = KMeans(init='k-means++', n_clusters=20, n_init=1)
k_means20.fit(data2)
print('Dunn index: ', dunnIndex(data2.T, k_means20.cluster_centers_.T, k_means20.labels_))
print('Davies-Bouldin index: ', daviesBouldinIndex(data2.T, k_means20.cluster_centers_.T, k_means20.labels_))
for i in range(20):
plt.plot(areaMap[k_means20.labels_ == i,0], areaMap[k_means20.labels_ == i,1], '.',markersize=1)
plt.show()
for i in range(20):
plt.plot(areaMap2[k_means20.labels_ == i,0], areaMap2[k_means20.labels_ == i,1], '.',markersize=1)
plt.show()
k_means30 = KMeans(init='k-means++', n_clusters=30, n_init=1)
k_means30.fit(data2)
print('Dunn index: ', dunnIndex(data2.T, k_means30.cluster_centers_.T, k_means30.labels_))
print('Davies-Bouldin index: ', daviesBouldinIndex(data2.T, k_means30.cluster_centers_.T, k_means30.labels_))
for i in range(30):
plt.plot(areaMap[k_means30.labels_ == i,0], areaMap[k_means30.labels_ == i,1], '.',markersize=1)
plt.show()
plt.figure(figsize=(12,15))
plt.plot(areaMap[k_means30.labels_ == 7,0], areaMap[k_means30.labels_ == 7,1], 'g.',markersize=1)
plt.plot(areaMap[k_means30.labels_ == 24,0], areaMap[k_means30.labels_ == 24,1], 'g.',markersize=1)
plt.plot(areaMap[k_means30.labels_ == 10,0], areaMap[k_means30.labels_ == 10,1], 'b.',markersize=1)
plt.plot(areaMap[k_means30.labels_ == 5,0], areaMap[k_means30.labels_ == 5,1], 'y.',markersize=1)
plt.plot(areaMap[k_means30.labels_ == 14,0], areaMap[k_means30.labels_ == 14,1], 'k.',markersize=1)
plt.plot(areaMap[k_means30.labels_ == 26,0], areaMap[k_means30.labels_ == 26,1], 'k.',markersize=1)
plt.show()
mini_batch_k_means50 = MiniBatchKMeans(init='k-means++', n_clusters=50, batch_size=50, n_init=1, max_no_improvement=10, verbose=0, random_state=0)
mini_batch_k_means50.fit(data2)
print('Dunn index: ', dunnIndex(data2.T, mini_batch_k_means50.cluster_centers_.T, mini_batch_k_means50.labels_))
print('Davies-Bouldin index: ', daviesBouldinIndex(data2.T, mini_batch_k_means50.cluster_centers_.T, mini_batch_k_means50.labels_))
for i in range(50):
print(i)
plt.plot(areaMap[mini_batch_k_means50.labels_ == i,0], areaMap[mini_batch_k_means50.labels_ == i,1], '.',markersize=1)
plt.show()
plt.figure(figsize=(12,15))
plt.plot(areaMap[mini_batch_k_means50.labels_ == 7,0], areaMap[mini_batch_k_means50.labels_ == 7,1], 'b.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 27,0], areaMap[mini_batch_k_means50.labels_ == 27,1], 'y.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 35,0], areaMap[mini_batch_k_means50.labels_ == 35,1], 'y.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 43,0], areaMap[mini_batch_k_means50.labels_ == 43,1], 'k.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 41,0], areaMap[mini_batch_k_means50.labels_ == 41,1], 'k.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 48,0], areaMap[mini_batch_k_means50.labels_ == 48,1], 'g.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 39,0], areaMap[mini_batch_k_means50.labels_ == 39,1], 'g.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 38,0], areaMap[mini_batch_k_means50.labels_ == 38,1], 'g.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 31,0], areaMap[mini_batch_k_means50.labels_ == 31,1], 'g.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 8,0], areaMap[mini_batch_k_means50.labels_ == 8,1], 'g.',markersize=1)
plt.plot(areaMap[mini_batch_k_means50.labels_ == 28,0], areaMap[mini_batch_k_means50.labels_ == 28,1], 'k.',markersize=1)
plt.show()
dbscan = DBSCAN(eps=500, min_samples=10)
dbscan.fit(data2)
np.unique(dbscan.labels_)
plt.figure(figsize=(12,15))
plt.plot(areaMap[dbscan.labels_ == 0,0], areaMap[dbscan.labels_ == 0,1], 'b.',markersize=1)
plt.plot(areaMap[dbscan.labels_ == 2,0], areaMap[dbscan.labels_ == 2,1], 'k.',markersize=1)
plt.plot(areaMap[dbscan.labels_ == 1,0], areaMap[dbscan.labels_ == 1,1], 'g.',markersize=1)
plt.plot(areaMap[dbscan.labels_ == 29,0], areaMap[dbscan.labels_ == 29,1], 'g.',markersize=1)
plt.show()
centers = np.empty((data2.shape[1],3))
for i in [0,1,2]:
l = data2.T[:,dbscan.labels_ == i]
centers[:,i] = np.sum(l, axis=1)/l.shape[1]
print('Dunn index: ', dunnIndex(data2.T, centers, dbscan.labels_))
print('Davies-Bouldin index: ', daviesBouldinIndex(data2.T, centers, dbscan.labels_))